{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "from awsglue.transforms import *\n",
    "from awsglue.utils import getResolvedOptions\n",
    "from pyspark.context import SparkContext\n",
    "from awsglue.context import GlueContext\n",
    "from awsglue.job import Job\n",
    "\n",
    "## @params: [JOB_NAME]\n",
    "args = getResolvedOptions(sys.argv, ['JOB_NAME'])\n",
    "\n",
    "sc = SparkContext()\n",
    "glueContext = GlueContext(sc)\n",
    "spark = glueContext.spark_session\n",
    "job = Job(glueContext)\n",
    "job.init(args['JOB_NAME'], args)\n",
    "## @type: DataSource\n",
    "## @args: [database = \"db_tlc_etl_green_car\", table_name = \"green\", transformation_ctx = \"datasource0\"]\n",
    "## @return: datasource0\n",
    "## @inputs: []\n",
    "datasource0 = glueContext.create_dynamic_frame.from_catalog(database = \"db_tlc_etl_green_car\", table_name = \"green\", transformation_ctx = \"datasource0\")\n",
    "## @type: ApplyMapping\n",
    "## @args: [mapping = [(\"vendorid\", \"long\", \"vendorid\", \"long\"), (\"lpep_pickup_datetime\", \"string\", \"lpep_pickup_datetime\", \"string\"), (\"lpep_dropoff_datetime\", \"string\", \"lpep_dropoff_datetime\", \"string\"), (\"ratecodeid\", \"long\", \"ratecodeid\", \"long\"), (\"pulocationid\", \"long\", \"pulocationid\", \"long\"), (\"dolocationid\", \"long\", \"dolocationid\", \"long\"), (\"passenger_count\", \"long\", \"passenger_count\", \"long\"), (\"trip_distance\", \"double\", \"trip_distance\", \"double\"), (\"fare_amount\", \"double\", \"fare_amount\", \"double\"), (\"extra\", \"double\", \"extra\", \"double\"), (\"mta_tax\", \"double\", \"mta_tax\", \"double\"), (\"tip_amount\", \"double\", \"tip_amount\", \"double\"), (\"tolls_amount\", \"double\", \"tolls_amount\", \"double\"), (\"improvement_surcharge\", \"double\", \"improvement_surcharge\", \"double\"), (\"total_amount\", \"double\", \"total_amount\", \"double\"), (\"payment_type\", \"long\", \"payment_type\", \"long\"), (\"trip_type\", \"long\", \"trip_type\", \"long\")], transformation_ctx = \"applymapping1\"]\n",
    "## @return: applymapping1\n",
    "## @inputs: [frame = datasource0]\n",
    "applymapping1 = ApplyMapping.apply(frame = datasource0, mappings = [(\"vendorid\", \"long\", \"vendorid\", \"long\"), (\"lpep_pickup_datetime\", \"string\", \"lpep_pickup_datetime\", \"string\"), (\"lpep_dropoff_datetime\", \"string\", \"lpep_dropoff_datetime\", \"string\"), (\"ratecodeid\", \"long\", \"ratecodeid\", \"long\"), (\"pulocationid\", \"long\", \"pulocationid\", \"long\"), (\"dolocationid\", \"long\", \"dolocationid\", \"long\"), (\"passenger_count\", \"long\", \"passenger_count\", \"long\"), (\"trip_distance\", \"double\", \"trip_distance\", \"double\"), (\"fare_amount\", \"double\", \"fare_amount\", \"double\"), (\"extra\", \"double\", \"extra\", \"double\"), (\"mta_tax\", \"double\", \"mta_tax\", \"double\"), (\"tip_amount\", \"double\", \"tip_amount\", \"double\"), (\"tolls_amount\", \"double\", \"tolls_amount\", \"double\"), (\"improvement_surcharge\", \"double\", \"improvement_surcharge\", \"double\"), (\"total_amount\", \"double\", \"total_amount\", \"double\"), (\"payment_type\", \"long\", \"payment_type\", \"long\"), (\"trip_type\", \"long\", \"trip_type\", \"long\")], transformation_ctx = \"applymapping1\")\n",
    "## @type: ResolveChoice\n",
    "## @args: [choice = \"make_struct\", transformation_ctx = \"resolvechoice2\"]\n",
    "## @return: resolvechoice2\n",
    "## @inputs: [frame = applymapping1]\n",
    "resolvechoice2 = ResolveChoice.apply(frame = applymapping1, choice = \"make_struct\", transformation_ctx = \"resolvechoice2\")\n",
    "## @type: DropNullFields\n",
    "## @args: [transformation_ctx = \"dropnullfields3\"]\n",
    "## @return: dropnullfields3\n",
    "## @inputs: [frame = resolvechoice2]\n",
    "dropnullfields3 = DropNullFields.apply(frame = resolvechoice2, transformation_ctx = \"dropnullfields3\")\n",
    "## @type: DataSink\n",
    "## @args: [connection_type = \"s3\", connection_options = {\"path\": \"s3://data-etl-o-target-0/result\"}, format = \"parquet\", transformation_ctx = \"datasink4\"]\n",
    "## @return: datasink4\n",
    "## @inputs: [frame = dropnullfields3]\n",
    "datasink4 = glueContext.write_dynamic_frame.from_options(frame = dropnullfields3, connection_type = \"s3\", connection_options = {\"path\": \"s3://data-etl-o-target-0/result\"}, format = \"parquet\", transformation_ctx = \"datasink4\")\n",
    "job.commit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rulesult:\n",
    "waitig to finish ,which will result target files\n",
    "s3://data-etl-o-target-0/result/*.parquet"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
